__author__ = '85359_000'
#encoding=utf-8
import numpy as np
import os
import pickle

path_current = os.getcwd()
path_parent = os.path.dirname(path_current)

mixInfo = open(path_parent+"/data/sample_log.csv")
labelInfo = open(path_parent+"/data/sample_info.csv")

#保存所有的商品种类
allCatId = {}
#保存所有卖家ID
allSellerId = {}
#保存所有的品牌种类
allBrandId = {}
#保存userID和label的映射关系
useridMapping = {}
#读取userID与calss的映射关系到useridMapping当中去

for line in labelInfo:
    if line[0]=='u':
            continue
    info = line.strip().split(',')
    if(useridMapping.has_key(info[0])):
        pass
    else:
        useridMapping[info[0]] = info[1]

#统计某一user对所有cat（seller，brand）的行为次数
def num(map):
    _num = 0
    for value in map.values():
        _num += value
    return _num

#为所有的cat和brand建立索引，begin表示索引的第一个数字
def creatIndex(map,begin=1):
    index = begin
    for key in map.keys():
        map[key] = index
        index += 1

#将指定内容持久化到文件中
def persistence(obj,filePath):
    file = open(filePath,'w')
    _obj = pickle.dumps(obj)
    file.write(_obj)
    file.close()

#读取持久化文件
def readPersistence(filePath):
    file = open(filePath)
    return pickle.loads(file.read())


#统计不同user对不同cat,seller和brand的行为数量,weight表示要统计的行为类型的权重，如[1,1,1,1]，表示四种
#行为类型的权重都为1（0-点击,1-购买，2-收藏，3-购物车）
#返回的是一个dict，key为userId，value为一个列表，列表里保存着三个dict，依次对应catID，sellerId和brandId
#和相应行为数量的映射关系
def features_export(weight):
    features = {}
    for line in mixInfo:
        if line[0]=='u':
            continue
        info = line.strip().split(',')
        userId = info[0]
        catId = info[2]
        sellerId = info[3]
        brandId = info[4]
        userAction = (int)(info[6])
        catMap = {}
        sellerMap = {}
        brandMap = {}

        if not allCatId.has_key(catId):
            allCatId[catId] = 0

        if not allSellerId.has_key(sellerId):
            allSellerId[sellerId] = 0

        if not allBrandId.has_key(brandId):
            allBrandId[brandId] = 0

        if not features.has_key(userId):
            features[userId] = []
            features[userId].append(catMap)
            features[userId].append(sellerMap)
            features[userId].append(brandMap)

        catMap = features[userId][0]
        sellerMap = features[userId][1]
        brandMap = features[userId][2]

        if catMap.has_key(catId):
            catMap[catId] += weight[userAction]
        else:
            catMap[catId] = weight[userAction]

        if sellerMap.has_key(sellerId):
            sellerMap[sellerId] += weight[userAction]
        else:
            sellerMap[sellerId] = weight[userAction]

        if brandMap.has_key(brandId):
            brandMap[brandId] += weight[userAction]
        else:
            brandMap[brandId] = weight[userAction]

    persistence(allCatId,"allCatId.txt")
    persistence(allSellerId,"allSellerId.txt")
    persistence(allBrandId,"allBrandId.txt")

    return features

#生成libliner要求格式的训练集,IDtype表示要选择的ID类型，如[1,1,1]表示选择cat，seller，brand三种ID，[1,0,1]
#表示只选择cat和brand两种ID,normalizedType表示归一化方式,flag=0表示生成测试集，falg=1生成训练集
def generateFeatureVector(features,IDtype,flag,normalizedType=1):
    allCatId = readPersistence("allCatId.txt")
    allSellerId = readPersistence("allSellerId.txt")
    allBrandId = readPersistence("allBrandId.txt")

    if IDtype[0]==1:
        creatIndex(allCatId)
        if IDtype[1]==1:
            creatIndex(allSellerId,len(allCatId)+1)
            if IDtype[2]==1:
                creatIndex(allBrandId,len(allCatId)+len(allSellerId)+1)
        elif IDtype[2]==1:
            creatIndex(allBrandId,len(allCatId)+1)
    elif IDtype[1]==1:
        creatIndex(allSellerId)
        if IDtype[2]==1:
            creatIndex(allBrandId,len(allSellerId)+1)
    else:
        if IDtype[2]==1:
            creatIndex(allBrandId)

    userIdList = open(path_parent+"/result/userIdList.txt","w")
    trainMat = []
    for userId in features.keys():
        userIdList.write(userId+'\n')
        featureVector = []

        catMap = features[userId][0]
        sellerMap = features[userId][1]
        brandMap = features[userId][2]

        if flag==0:
            featureVector.append(0)
        else:
            if useridMapping.has_key(userId):
                featureVector.append(useridMapping[userId])
            else:
                featureVector.append(1)

        if IDtype[0]==1:
            for catId in allCatId.keys():
                if catMap.has_key(catId):
                    if normalizedType==0:
                        featureVector.append(str(allCatId[catId])+':'+str(catMap[catId]*1.0/num(catMap)))
                    else:
                        featureVector.append(str(allCatId[catId])+':'+str(catMap[catId]*1.0/max(catMap.values())))

        if IDtype[1]==1:
            for sellerId in allSellerId.keys():
                if sellerMap.has_key(sellerId):
                    if normalizedType==0:
                        featureVector.append(str(allSellerId[sellerId])+':'+str(sellerMap[sellerId]*1.0/num(sellerMap)))
                    else:
                        featureVector.append(str(allSellerId[sellerId])+':'+str(sellerMap[sellerId]*1.0/max(sellerMap.values())))

        if IDtype[2]==1:
            for brandId in allBrandId.keys():
                if brandMap.has_key(brandId):
                    if normalizedType==0:
                        featureVector.append(str(allBrandId[brandId])+':'+str(brandMap[brandId]*1.0/num(brandMap)))
                    else:
                        featureVector.append(str(allBrandId[brandId])+':'+str(brandMap[brandId]*1.0/max(brandMap.values())))

        trainMat.append(featureVector)
    userIdList.close()
    return trainMat

#将统计好的特征持续化到文件中
def dataToFile(data,filePath):
    file = open(path_parent+"/result/"+filePath,"w")
    for i in data:
        count = 1
        for j in i:
            file.write(str(j))
            if count<len(i):
                file.write(' ')
            count += 1
        file.write("\n")
    file.close()

# _features = features_export([1,1,1,1])
# persistence(_features,"features.txt")

features = readPersistence("features.txt")

train_catSellerBrand = generateFeatureVector(features,[1,1,1],1)
# train_cat = generateFeatureVector(features,[1,0,0],1)
# train_seller = generateFeatureVector(features,[0,1,0],1)
# train_brand = generateFeatureVector(features,[0,0,1],1)

dataToFile(train_catSellerBrand,"train_catSellerBrand.txt")
# dataToFile(train_cat,"train_cat.txt")
# dataToFile(train_seller,"train_seller.txt")
# dataToFile(train_brand,"train_brand.txt")
